knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
knitr::opts_chunk$set(cache=TRUE, cache.extra = R.version)
reticulate::use_condaenv("reticulate_PCHA", conda = "auto", required = TRUE)
https://vitkl.github.io/ParetoTI/
suppressPackageStartupMessages({
library(ParetoTI)
library(ggplot2)
library(ggfortify)
library(cowplot)
library(Matrix)
library(tidyverse)
library('biomaRt')
library('clusterProfiler')
library(vegan)
})
#source("scripts/utils/human2mouseHGNC.R")
Loading data from Emory (A & T Wingo) and metadata downloaded directly from synapse, as well as Nikhil’s subtypes.
Merging it all together and creating a new diagnosis variable that combines cogdx, braak, and cedar scores based on syn8456629
Dementia that isn’t MCI or AD (cogdx=6) is removed so that “Other” includes only MCI
# Nikhil's subtypes
subtypes <- readRDS("data/milind2019/rosmap_patient_subtypes.RDS") %>%
arrange(Patient)
rownames(subtypes) <- subtypes$Patient
# Proteomics data from T&A Wingo
prot <- read_csv("data/proteomics_n391_residual_log2_batchMSsexPMIageStudy.csv") %>%
column_to_rownames("X1") %>%
t()
# key between proteomics ID and project ID (from T&A Wingo)
protid <- read_csv("data/metadata/proteomics_ROS_MAP_TRAITS_clean.csv") %>%
dplyr::select(-Batch) %>%
dplyr::filter(proteomicsid%in%rownames(prot))
# Clinical (covariates) data syn3191087
meta.cl <- read_csv("data/metadata/ROSMAP_clinical_2019-05_v3.csv")
# syn3382527 to match patients id's
key <- read_csv("data/metadata/ROSMAP_IDkey.csv") %>%
dplyr::select(projid, rnaseq_id, wgs_id) %>%
merge(protid, by="projid", all=TRUE) %>%
filter(proteomicsid %in% rownames(prot)) %>%
filter(!duplicated(projid))
meta <- merge(key, meta.cl, by = "projid", all=FALSE) %>%
merge(subtypes, by.x="rnaseq_id", by.y="Patient", all.x=TRUE, all.y=FALSE) %>% # adding Nikil's subtyppes info
arrange(proteomicsid) # same order as data
# combined diagnosis based on syn8456629
# Remove dementia that isn't MCI or AD (cogdx=6) so that "Other" includes only MCI
meta <- meta %>%
dplyr::filter(cogdx!=6) %>%
mutate(diagnosis = if_else(cogdx == 4 & braaksc >= 4 & ceradsc <= 2, "AD",
if_else(cogdx == 1 & braaksc <= 3 & ceradsc >= 3, "Control", "MCI") ))
# Same as diagnosis from Nikil's work (syn11024258)
# meta %>% dplyr::filter(diagnosis=="AD") %>% dplyr::select(Subtype) %>% unique()
# meta %>% dplyr::filter(diagnosis=="Control") %>% dplyr::select(Subtype) %>% unique()
# Remove proteins with missing data
# 17% of proteins have >20% missing data; 8% of samples are missing >20%
# 4065 proteins with any missing data
data <- prot[meta$proteomicsid,]
naic <- unique(which(is.na(data), arr.ind=T)[,2])
data <- data[,-naic]
# Labels for plotting below
labels <- as.character(meta$diagnosis)
names(labels) <- meta$proteomicsid
labels[meta$Subtype=="A"] <- "A"
labels[meta$Subtype=="B"] <- "B"
There is almost complete overlap between AD, Control, and MCI, with MCI being more spread out than AD and Control.
It takes 206 PC’s to cover 80% of the variance.
The first ~23 PC’s are above random based on the broken stick model, covering together 25% of the variance.
pc <- prcomp(data)
#pdf("output/1_fit_archetypes/PCAscreeplot_bs.pdf", width=6, height=7)
p1 <- screeplot(pc, bstick=TRUE, npcs = 35, main=NULL) # First ~23 PC's are above random
rel.ev <- pc$sdev/sum(pc$sdev) # proportion of variance explained
e8 <- which(cumsum(rel.ev)>0.8)[1] # it takes 206 PC's to cover 80% of the variance
plot(cumsum(rel.ev), ylab="Cumulative proportion of explained variance")
segments(0,0.8,e8,0.8, col="red", lty=4)
segments(e8,0.8,e8,0, col="red", lty=4)
#dev.off()
autoplot(pc, data = meta, colour = 'diagnosis')
p=23
cases <- meta$proteomicsid[meta$diagnosis == "AD"]
pcs4arch <- t(pc$x[cases,1:p])
Variance explained by different polytopes:
arc_ks = k_fit_pch(pcs4arch, ks = 2:6, check_installed = T,
bootstrap = T, bootstrap_N = 200, maxiter = 1000,
bootstrap_type = "s", seed = 2543,
volume_ratio = "none", # set to "none" if too slow
delta=0, conv_crit = 1e-04, order_type = "align",
sample_prop = 0.75)
p1 <- plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p2 <- plot_arc_var(arc_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p3 <- plot_arc_var(arc_ks, type = "total_var", point_size = 2, line_size = 1.5) +
theme_bw() +
ylab("Mean variance in position of vertices") # look for the highest k that gives reasonably low variance
p1
p2
p3
rm(list=c("p1", "p2", "p3"))
k=2
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = labels[cases],
text_size = 6, data_size = 3
)
pl3 = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = labels[cases]
#colors= palette(rainbow(6))
)
k=3
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = labels[cases],
text_size = 6, data_size = 3
)
pl3 = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = labels[cases]
#colors= palette(rainbow(6))
)
k=4
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
p=206
cases <- meta$proteomicsid[meta$diagnosis == "AD"]
pcs4arch <- t(pc$x[cases,1:p])
Variance explained by different polytopes:
arc_ks = k_fit_pch(pcs4arch, ks = 2:6, check_installed = T,
bootstrap = T, bootstrap_N = 200, maxiter = 1000,
bootstrap_type = "s", seed = 2543,
volume_ratio = "none", # set to "none" if too slow
delta=0, conv_crit = 1e-04, order_type = "align",
sample_prop = 0.75)
p1 <- plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p2 <- plot_arc_var(arc_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p3 <- plot_arc_var(arc_ks, type = "total_var", point_size = 2, line_size = 1.5) +
theme_bw() +
ylab("Mean variance in position of vertices") # look for the highest k that gives reasonably low variance
p1
p2
p3
rm(list=c("p1", "p2", "p3"))
k=2
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = labels[cases]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = labels[cases],
text_size = 6, data_size = 3
)
k=3
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
k=4
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
p=23
cases <- meta$proteomicsid[meta$diagnosis %in% c("AD", "MCI")]
pcs4arch <- t(pc$x[cases,1:p])
Variance explained by different polytopes:
arc_ks = k_fit_pch(pcs4arch, ks = 2:6, check_installed = T,
bootstrap = T, bootstrap_N = 200, maxiter = 1000,
bootstrap_type = "s", seed = 2543,
volume_ratio = "none", # set to "none" if too slow
delta=0, conv_crit = 1e-04, order_type = "align",
sample_prop = 0.75)
p1 <- plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p2 <- plot_arc_var(arc_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p3 <- plot_arc_var(arc_ks, type = "total_var", point_size = 2, line_size = 1.5) +
theme_bw() +
ylab("Mean variance in position of vertices") # look for the highest k that gives reasonably low variance
p1
p2
p3
rm(list=c("p1", "p2", "p3"))
k=2
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = labels[cases]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = labels[cases],
text_size = 6, data_size = 3
)
k=3
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
k=4
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
p=206
cases <- meta$proteomicsid[meta$diagnosis %in% c("AD", "MCI")]
pcs4arch <- t(pc$x[cases,1:p])
Variance explained by different polytopes:
arc_ks = k_fit_pch(pcs4arch, ks = 2:6, check_installed = T,
bootstrap = T, bootstrap_N = 200, maxiter = 1000,
bootstrap_type = "s", seed = 2543,
volume_ratio = "none", # set to "none" if too slow
delta=0, conv_crit = 1e-04, order_type = "align",
sample_prop = 0.75)
p1 <- plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p2 <- plot_arc_var(arc_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p3 <- plot_arc_var(arc_ks, type = "total_var", point_size = 2, line_size = 1.5) +
theme_bw() +
ylab("Mean variance in position of vertices") # look for the highest k that gives reasonably low variance
p1
p2
p3
rm(list=c("p1", "p2", "p3"))
k=2
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = labels[cases]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = labels[cases],
text_size = 6, data_size = 3
)
k=3
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
k=4
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
p=23
cases <- meta$proteomicsid[meta$diagnosis %in% c("AD", "MCI", "Control")]
pcs4arch <- t(pc$x[cases,1:p])
Variance explained by different polytopes:
arc_ks = k_fit_pch(pcs4arch, ks = 2:6, check_installed = T,
bootstrap = T, bootstrap_N = 200, maxiter = 1000,
bootstrap_type = "s", seed = 2543,
volume_ratio = "none", # set to "none" if too slow
delta=0, conv_crit = 1e-04, order_type = "align",
sample_prop = 0.75)
p1 <- plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p2 <- plot_arc_var(arc_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p3 <- plot_arc_var(arc_ks, type = "total_var", point_size = 2, line_size = 1.5) +
theme_bw() +
ylab("Mean variance in position of vertices") # look for the highest k that gives reasonably low variance
p1
p2
p3
rm(list=c("p1", "p2", "p3"))
k=2
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = labels[cases]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = labels[cases],
text_size = 6, data_size = 3
)
k=3
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
k=4
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
p=206
cases <- meta$proteomicsid[meta$diagnosis %in% c("AD", "MCI", "Control")]
pcs4arch <- t(pc$x[cases,1:p])
Variance explained by different polytopes:
arc_ks = k_fit_pch(pcs4arch, ks = 2:6, check_installed = T,
bootstrap = T, bootstrap_N = 200, maxiter = 1000,
bootstrap_type = "s", seed = 2543,
volume_ratio = "none", # set to "none" if too slow
delta=0, conv_crit = 1e-04, order_type = "align",
sample_prop = 0.75)
p1 <- plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p2 <- plot_arc_var(arc_ks, type = "res_varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p3 <- plot_arc_var(arc_ks, type = "total_var", point_size = 2, line_size = 1.5) +
theme_bw() +
ylab("Mean variance in position of vertices") # look for the highest k that gives reasonably low variance
p1
p2
p3
rm(list=c("p1", "p2", "p3"))
k=2
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = labels[cases]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = labels[cases],
text_size = 6, data_size = 3
)
k=3
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
k=4
arcfit <- fit_pch_bootstrap(pcs4arch, n = 200, sample_prop = 0.75, seed = 235,
noc = k, delta = 0, conv_crit = 1e-04, type = "cmq")
# empty points are bootstrapped data showing variance around each archetype
plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:3, line_size = 1.5,
text_size = 24, data_size = 3,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"]
#colors= palette(rainbow(6))
)
p_pca = plot_arc(arc_data = arcfit, data = pcs4arch,
which_dimensions = 1:2, line_size = 1.5,
data_lab = meta[meta$proteomicsid%in%cases, "diagnosis"],
text_size = 6, data_size = 3
)
Calculating Eclidean distance of each sample (including control and other) from each of the archetypes.
Archetype position is calculated as the mean of the bootstrapped scores around each archetype.
Each sample is classified to the closest archetype.
This infomration is added to the metadata file.
Xpca <- t(average_pch_fits(arcfit)$XC)
rownames(Xpca) <- c("Archetype_1", "Archetype_2", "Archetype_3")
allscores <- t(pc$x[,1:p])
archetypes <- as.data.frame(matrix(NA, nr=nrow(pc$x), nc=k,
dimnames=list(meta$proteomicsid, c("archetype_1","archetype_2","archetype_3"))))
# calculate euclidean distances for each sample from each archetype
for (i in 1:k) {
archetypes[,i] <- sqrt(colSums((allscores - Xpca[i,])^2))
}
# classify each sample to closest archetype
archetypes$archetype <- apply(archetypes, 1, function(x){names(x)[x==min(x)]})
archetypes$rnaseq_id <- rownames(archetypes)
archmeta <- merge(meta, archetypes, by="proteomicsid")
saveRDS(archmeta, file="output/proteomics/1_fit_archetypes/archetypes_meta_AD_k3p20.RDS")
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.2 (2019-12-12)
## os macOS Mojave 10.14.6
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-04-11
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib
## abind 1.4-5 2016-07-21 [1]
## AnnotationDbi 1.48.0 2019-10-29 [1]
## AnnotationHub 2.18.0 2019-10-29 [1]
## askpass 1.1 2019-01-13 [1]
## assertthat 0.2.1 2019-03-21 [1]
## backports 1.1.6 2020-04-05 [1]
## Biobase 2.46.0 2019-10-29 [1]
## BiocFileCache 1.10.2 2019-11-08 [1]
## BiocGenerics 0.32.0 2019-10-29 [1]
## BiocManager 1.30.10 2019-11-16 [1]
## BiocParallel 1.20.1 2019-12-21 [1]
## BiocVersion 3.10.1 2019-06-06 [1]
## biomaRt * 2.42.1 2020-03-26 [1]
## bit 1.1-15.2 2020-02-10 [1]
## bit64 0.9-7 2017-05-08 [1]
## blob 1.2.1 2020-01-20 [1]
## broom 0.5.5 2020-02-29 [1]
## callr 3.4.3 2020-03-28 [1]
## cellranger 1.1.0 2016-07-27 [1]
## cli 2.0.2 2020-02-28 [1]
## cluster 2.1.0 2019-06-19 [1]
## clustermq 0.8.9 2020-02-29 [1]
## clusterProfiler * 3.14.3 2020-01-08 [1]
## codetools 0.2-16 2018-12-24 [1]
## colorspace 1.4-1 2019-03-18 [1]
## cowplot * 1.0.0 2019-07-11 [1]
## crayon 1.3.4 2017-09-16 [1]
## crosstalk 1.1.0.1 2020-03-13 [1]
## curl 4.3 2019-12-02 [1]
## data.table * 1.12.8 2019-12-09 [1]
## DBI 1.1.0 2019-12-15 [1]
## dbplyr 1.4.2 2019-06-17 [1]
## desc 1.2.0 2018-05-01 [1]
## devtools 2.3.0 2020-04-10 [1]
## digest 0.6.25 2020-02-23 [1]
## DO.db 2.9 2019-11-19 [1]
## DOSE 3.12.0 2019-10-29 [1]
## dplyr * 0.8.5 2020-03-07 [1]
## ellipsis 0.3.0 2019-09-20 [1]
## enrichplot 1.6.1 2019-12-16 [1]
## europepmc 0.3 2018-04-20 [1]
## evaluate 0.14 2019-05-28 [1]
## fansi 0.4.1 2020-01-08 [1]
## farver 2.0.3 2020-01-16 [1]
## fastmap 1.0.1 2019-10-08 [1]
## fastmatch 1.1-0 2017-01-28 [1]
## fgsea 1.12.0 2019-10-29 [1]
## forcats * 0.5.0 2020-03-01 [1]
## fs 1.4.1 2020-04-04 [1]
## generics 0.0.2 2018-11-29 [1]
## geometry 0.4.5 2019-12-04 [1]
## ggforce 0.3.1 2019-08-20 [1]
## ggfortify * 0.4.9 2020-03-11 [1]
## ggplot2 * 3.3.0 2020-03-05 [1]
## ggplotify 0.0.5 2020-03-12 [1]
## ggraph 2.0.2 2020-03-17 [1]
## ggrepel 0.8.2 2020-03-08 [1]
## ggridges 0.5.2 2020-01-12 [1]
## glue 1.4.0 2020-04-03 [1]
## GO.db 3.10.0 2019-11-19 [1]
## GOSemSim 2.12.1 2020-03-19 [1]
## graphlayouts 0.6.0 2020-03-09 [1]
## gridExtra 2.3 2017-09-09 [1]
## gridGraphics 0.5-0 2020-02-25 [1]
## gtable 0.3.0 2019-03-25 [1]
## haven 2.2.0 2019-11-08 [1]
## hms 0.5.3 2020-01-08 [1]
## htmltools 0.4.0 2019-10-04 [1]
## htmlwidgets 1.5.1 2019-10-08 [1]
## httpuv 1.5.2 2019-09-11 [1]
## httr 1.4.1 2019-08-05 [1]
## igraph 1.2.5 2020-03-19 [1]
## interactiveDisplayBase 1.24.0 2019-10-29 [1]
## IRanges 2.20.2 2020-01-13 [1]
## jsonlite 1.6.1 2020-02-02 [1]
## knitr 1.28 2020-02-06 [1]
## labeling 0.3 2014-08-23 [1]
## later 1.0.0 2019-10-04 [1]
## lattice * 0.20-41 2020-04-02 [1]
## lazyeval 0.2.2 2019-03-15 [1]
## lifecycle 0.2.0 2020-03-06 [1]
## lpSolve * 5.6.15 2020-01-24 [1]
## lubridate 1.7.8 2020-04-06 [1]
## magic 1.5-9 2018-09-17 [1]
## magrittr 1.5 2014-11-22 [1]
## MASS 7.3-51.5 2019-12-20 [1]
## Matrix * 1.2-18 2019-11-27 [1]
## matrixStats 0.56.0 2020-03-13 [1]
## memoise 1.1.0 2017-04-21 [1]
## mgcv 1.8-31 2019-11-09 [1]
## mime 0.9 2020-02-04 [1]
## modelr 0.1.6 2020-02-22 [1]
## munsell 0.5.0 2018-06-12 [1]
## nlme 3.1-145 2020-03-04 [1]
## openssl 1.4.1 2019-07-18 [1]
## ParetoTI * 0.1.13 2020-04-11 [1]
## permute * 0.9-5 2019-03-12 [1]
## pillar 1.4.3 2019-12-20 [1]
## pkgbuild 1.0.6 2019-10-09 [1]
## pkgconfig 2.0.3 2019-09-22 [1]
## pkgload 1.0.2 2018-10-29 [1]
## plotly 4.9.2.1 2020-04-04 [1]
## plyr 1.8.6 2020-03-03 [1]
## polyclip 1.10-0 2019-03-14 [1]
## prettyunits 1.1.1 2020-01-24 [1]
## processx 3.4.2 2020-02-09 [1]
## progress 1.2.2 2019-05-16 [1]
## promises 1.1.0 2019-10-04 [1]
## ps 1.3.2 2020-02-13 [1]
## purrr * 0.3.3 2019-10-18 [1]
## qvalue 2.18.0 2019-10-29 [1]
## R6 2.4.1 2019-11-12 [1]
## rappdirs 0.3.1 2016-03-28 [1]
## RColorBrewer 1.1-2 2014-12-07 [1]
## Rcpp 1.0.4.6 2020-04-09 [1]
## readr * 1.3.1 2018-12-21 [1]
## readxl 1.3.1 2019-03-13 [1]
## remotes 2.1.1 2020-02-15 [1]
## reprex 0.3.0 2019-05-16 [1]
## reshape2 1.4.4 2020-04-09 [1]
## reticulate * 1.15 2020-04-02 [1]
## rlang 0.4.5 2020-03-01 [1]
## rmarkdown 2.1 2020-01-20 [1]
## rprojroot 1.3-2 2018-01-03 [1]
## RSQLite 2.2.0 2020-01-07 [1]
## rstudioapi 0.11 2020-02-07 [1]
## rvcheck 0.1.8 2020-03-01 [1]
## rvest 0.3.5 2019-11-08 [1]
## rzmq 0.9.7 2020-01-31 [1]
## S4Vectors 0.24.4 2020-04-09 [1]
## scales 1.1.0 2019-11-18 [1]
## sessioninfo 1.1.1 2018-11-05 [1]
## shiny 1.4.0.2 2020-03-13 [1]
## stringi 1.4.6 2020-02-17 [1]
## stringr * 1.4.0 2019-02-10 [1]
## testthat 2.3.2 2020-03-02 [1]
## tibble * 3.0.0 2020-03-30 [1]
## tidygraph 1.1.2 2019-02-18 [1]
## tidyr * 1.0.2 2020-01-24 [1]
## tidyselect 1.0.0 2020-01-27 [1]
## tidyverse * 1.3.0 2019-11-21 [1]
## triebeard 0.3.0 2016-08-04 [1]
## tweenr 1.0.1 2018-12-14 [1]
## urltools 1.7.3 2019-04-14 [1]
## usethis 1.6.0 2020-04-09 [1]
## vctrs 0.2.4 2020-03-10 [1]
## vegan * 2.5-6 2019-09-01 [1]
## viridis 0.5.1 2018-03-29 [1]
## viridisLite 0.3.0 2018-02-01 [1]
## withr 2.1.2 2018-03-15 [1]
## xfun 0.12 2020-01-13 [1]
## XML 3.99-0.3 2020-01-20 [1]
## xml2 1.3.1 2020-04-09 [1]
## xtable 1.8-4 2019-04-21 [1]
## yaml 2.2.1 2020-02-01 [1]
## source
## CRAN (R 3.6.0)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.0)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## Bioconductor
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.1)
## CRAN (R 3.6.0)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## Bioconductor
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## Bioconductor
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## Github (vitkl/ParetoTI@5109906)
## CRAN (R 3.6.0)
## CRAN (R 3.6.1)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## Bioconductor
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.1)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.1)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
## CRAN (R 3.6.2)
## CRAN (R 3.6.0)
## CRAN (R 3.6.0)
##
## [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library